library(Seurat)
library(ggplot2)
folder="~/corona_project/plots_new/"
batch_list=list("P2","P3")
batch_data_list=list("P2"=1,"P3"=1)
sce_3_1_1_5000_23_new=list("1","2")
for( i in 1:length(batch_list))
{
print(batch_list[[i]])
s_object=Read10X(paste("~/corona_project/Input_files/",batch_list[[i]],sep=""))
s_object=CreateSeuratObject(counts =s_object, min.cells = 0, min.features = 400, project = "P23")
s_object[["percent.mt"]] <- PercentageFeatureSet(s_object, pattern = "^MT-")
s_object <- subset(s_object, subset = nFeature_RNA >100 & nFeature_RNA <8000 & percent.mt <10)
s_object@meta.data[, "run"] <- batch_list[i]
s_object=NormalizeData(s_object)
batch_data_list[[i]]=FindVariableFeatures(s_object, selection.method = "dispersion", nfeatures =5000)
run.combined <- ScaleData(batch_data_list[[i]], verbose = FALSE)
run.combined <- RunPCA(run.combined, npcs = 30, verbose = FALSE)
# t-SNE and Clustering
run.combined <- RunUMAP(run.combined, reduction = "pca", dims = 1:30)
run.combined <- FindNeighbors(run.combined, reduction = "pca", dims = 1:30)
sce_3_1_1_5000_23_new[[i]] <- FindClusters(run.combined, resolution = 0.5)
}
## [1] "P2"
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 01:13:11 UMAP embedding parameters a = 0.9922 b = 1.112
## 01:13:11 Read 10881 rows and found 30 numeric columns
## 01:13:11 Using Annoy for neighbor search, n_neighbors = 30
## 01:13:11 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 01:13:13 Writing NN index file to temp file /tmp/RtmpUuGHad/file1cfd2e2e5afd
## 01:13:13 Searching Annoy index using 1 thread, search_k = 3000
## 01:13:17 Annoy recall = 100%
## 01:13:17 Commencing smooth kNN distance calibration using 1 thread
## 01:13:18 Initializing from normalized Laplacian + noise
## 01:13:19 Commencing optimization for 200 epochs, with 457516 positive edges
## 01:13:27 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10881
## Number of edges: 391628
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9502
## Number of communities: 24
## Elapsed time: 1 seconds
## [1] "P3"
## 01:17:03 UMAP embedding parameters a = 0.9922 b = 1.112
## 01:17:03 Read 5415 rows and found 30 numeric columns
## 01:17:03 Using Annoy for neighbor search, n_neighbors = 30
## 01:17:03 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 01:17:04 Writing NN index file to temp file /tmp/RtmpUuGHad/file1cfd53303880
## 01:17:04 Searching Annoy index using 1 thread, search_k = 3000
## 01:17:06 Annoy recall = 100%
## 01:17:06 Commencing smooth kNN distance calibration using 1 thread
## 01:17:06 Initializing from normalized Laplacian + noise
## 01:17:07 Commencing optimization for 500 epochs, with 220558 positive edges
## 01:17:16 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5415
## Number of edges: 177316
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9532
## Number of communities: 25
## Elapsed time: 0 seconds
p2<-RenameIdents(sce_3_1_1_5000_23_new[[1]],`0`="Fibroblasts", `1`="CD8T cells",`2`="Pericytes", `3`="Olfactory HBCs", `4`="Pericytes",
`5`="Vascular Smooth Muscle cells", `6`="Macrophages", `7`="Subtentacular Cells", `8`="Neuron Cells",
`9`="Plasma Cells", `10`="Bowman's Gland", `11`="Bowman's Gland", `12`="Respiratory HBC Cells",
`13`="Olfactory Ensheathing Glia", `14`="Dendritic cells", `15`="Monocytes", `16`="Subtentacular Cells",
`17`="Natural Killer cells", `18`="Fibroblasts", `19`="Mast Cells", `20`="Fibroblasts", `21`="Olfactory Progenator Cells",
`22`="GBCs", `23`="Pericytes" )
p3<-RenameIdents(sce_3_1_1_5000_23_new[[2]],`0`="Pericytes", `1`="Fibroblasts",`2`="CD8T cells", `3`="Vascular Smooth Muscle cells",
`4`="Plasma Cells", `5`="Vascular Smooth Muscle cells", `6`="Pericytes", `7`="Olfactory HBCs",
`8`="CD4T Cells", `9`="B Cells", `10`="Bowman's Gland", `11`="Olfactory HBCs", `12`="Immature Neurons",
`13`="Olfactory Microvillar Cells", `14`="Natural Killer cells", `15`="Subtentacular Cells", `16`="Monocytes",
`17`="Subtentacular Cells", `18`="Dendritic cells", `19`="Olfactory Ensheathing Glia", `20`="GBCs",
`21`="Mature Neurons", `22`="Plasma Cells", `23`="Macrophages" ,`24`="Mast Cells")
p2s=subset(p2, idents = names(table(Idents(p2)))[c(4,8,10,18,7)])
p3s=subset(p3, idents = names(table(Idents(p3)))[c(17,18,10,13,11,6,9)])
a=table(Seurat::Idents(p2s)[colnames(p2s[,as.matrix(p2s@assays$RNA["ACE2",])>0])])
df=data.frame(cell_type=names(a),count=a)
ggplot(data=df, aes(x=cell_type, y=count.Freq)) +
geom_bar(stat="identity") + theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

a=table(Seurat::Idents(p3s)[colnames(p3s[,as.matrix(p3s@assays$RNA["ACE2",])>0])])
df=data.frame(cell_type=names(a),count=a)
ggplot(data=df, aes(x=cell_type, y=count.Freq)) +
geom_bar(stat="identity") + theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

DimPlot(p3s)

DimPlot(p2s)

genes_marker=c("ACE2",
"TP63","KRT5","CXCL14","SOX2","MEG3",
"ASCL3","CFTR","HEPACAM2","FOXL1",
"GNG8","OLIG2","EBF2","LHX2","CBX8",
"GNG13","EBF2,","CBX8","RTP1",
"HES6","ASCL1","CXCR4","SOX2","EZH2","NEUROD1","NEUROG1",
"CYP2A13","CYP2J2","GPX6","ERMN","SOX2",
"SOX9", "SOX10", "MUC5", "GPX3")
genes_index=c(1,2,7,11,16,20,27,33,37)
cells_marker=c("ACE2",names(table(Idents(p3s)))[c(1,4,3,7,6,5,2)])
for(i in 1:length(cells_marker))
{
print(i)
print(FeaturePlot(p2s, features = genes_marker[genes_index[i]:(genes_index[i+1]-1)],combine = FALSE,cols=c("lightgrey", "red")))
print(FeaturePlot(p3s, features = genes_marker[genes_index[i]:(genes_index[i+1]-1)],combine = FALSE,cols=c("lightgrey", "red")))
}
## [1] 1
## [[1]]

##
## [[1]]

##
## [1] 2
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]
##
## [1] 3
## Warning in FeaturePlot(p2s, features = genes_marker[genes_index[i]:
## (genes_index[i + : All cells have the same value (0) of FOXL1.

## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [1] 4
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]
##
## [1] 5
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: EBF2,

## [[1]]

##
## [[2]]

##
## [[3]]
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: EBF2,

## [[1]]

##
## [[2]]

##
## [[3]]

##
## [1] 6
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [1] 7
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]
##
## [1] 8
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: MUC5, NA

## [[1]]

##
## [[2]]
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: MUC5, NA

## [[1]]

##
## [[2]]

p2sHBC=subset(p2, idents = names(table(Idents(p2)))[c(4)])
pos=c(rep(1,length(colnames(p2sHBC[,as.matrix(p2sHBC@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(p2sHBC[,as.matrix(p2sHBC@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(p2sHBC[,as.matrix(p2sHBC@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(p2sHBC[,as.matrix(p2sHBC@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(p2sHBC@meta.data))]==pos_neg)
## [1] 785
p2sHBC=AddMetaData(
object = p2sHBC,
metadata = pos_neg,
col.name = 'HBC'
)
Idents(p2sHBC)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(p2sHBC)))]==pos_neg)
## [1] 785
p2_HBC_genes=FindMarkers(p2sHBC, ident.1=1,ident.2=-1,test.use = "wilcox")
p2_HBC_genes_a=rownames(p2_HBC_genes)[p2_HBC_genes[2]>1 & p2_HBC_genes[1]<0.05]
p2_HBC_genes_b=rownames(p2_HBC_genes)[p2_HBC_genes[2]<-1 & p2_HBC_genes[1]<0.05]
p2_HBC_genes_names=c(p2_HBC_genes_a,p2_HBC_genes_b)
p2sHBC_1 <- RunPCA(p2sHBC, npcs = 30, verbose = FALSE)
p2sHBC_1 = RunUMAP(p2sHBC_1, reduction = "pca", dims = 1:30)
## 01:18:02 UMAP embedding parameters a = 0.9922 b = 1.112
## 01:18:02 Read 785 rows and found 30 numeric columns
## 01:18:02 Using Annoy for neighbor search, n_neighbors = 30
## 01:18:02 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 01:18:02 Writing NN index file to temp file /tmp/RtmpUuGHad/file1cfd29a7cc6b
## 01:18:02 Searching Annoy index using 1 thread, search_k = 3000
## 01:18:03 Annoy recall = 100%
## 01:18:03 Commencing smooth kNN distance calibration using 1 thread
## 01:18:04 Initializing from normalized Laplacian + noise
## 01:18:04 Commencing optimization for 500 epochs, with 36342 positive edges
## 01:18:06 Optimization finished
p2sHBC_1=FindNeighbors(p2sHBC_1, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
p2sHBC_1=FindClusters(p2sHBC_1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 785
## Number of edges: 45579
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5599
## Number of communities: 3
## Elapsed time: 0 seconds
DimPlot(p2sHBC_1,pt.size = 1)

p3sHBC=subset(p3, idents = names(table(Idents(p3)))[c(6)])
pos=c(rep(1,length(colnames(p3sHBC[,as.matrix(p3sHBC@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(p3sHBC[,as.matrix(p3sHBC@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(p3sHBC[,as.matrix(p3sHBC@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(p3sHBC[,as.matrix(p3sHBC@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(p3sHBC@meta.data))]==pos_neg)
## [1] 492
p3sHBC=AddMetaData(
object = p3sHBC,
metadata = pos_neg,
col.name = 'HBC'
)
Idents(p3sHBC)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(p3sHBC)))]==pos_neg)
## [1] 492
p3_HBC_genes=FindMarkers(p3sHBC, ident.1=1,ident.2=-1,test.use = "wilcox")
p3_HBC_genes_a=rownames(p3_HBC_genes)[p3_HBC_genes[2]>1 & p3_HBC_genes[1]<0.05]
p3_HBC_genes_b=rownames(p3_HBC_genes)[p3_HBC_genes[2]<-1 & p3_HBC_genes[1]<0.05]
p3_HBC_genes_names=c(p3_HBC_genes_a,p3_HBC_genes_b)
p3sHBC_1 <- RunPCA(p3sHBC, npcs = 30, verbose = FALSE)
p3sHBC_1 = RunUMAP(p3sHBC_1, reduction = "pca", dims = 1:30)
## 01:18:15 UMAP embedding parameters a = 0.9922 b = 1.112
## 01:18:15 Read 492 rows and found 30 numeric columns
## 01:18:15 Using Annoy for neighbor search, n_neighbors = 30
## 01:18:15 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 01:18:15 Writing NN index file to temp file /tmp/RtmpUuGHad/file1cfdb551686
## 01:18:15 Searching Annoy index using 1 thread, search_k = 3000
## 01:18:16 Annoy recall = 100%
## 01:18:16 Commencing smooth kNN distance calibration using 1 thread
## 01:18:17 Initializing from normalized Laplacian + noise
## 01:18:17 Commencing optimization for 500 epochs, with 21864 positive edges
## 01:18:19 Optimization finished
p3sHBC_1=FindNeighbors(p3sHBC, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
p3sHBC_1=FindClusters(p3sHBC_1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 492
## Number of edges: 19533
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7589
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(p3sHBC_1,pt.size = 1)

p3sMicro=subset(p3, idents = names(table(Idents(p3)))[c(11)])
pos=c(rep(1,length(colnames(p3sMicro[,as.matrix(p3sMicro@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(p3sMicro[,as.matrix(p3sMicro@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(p3sMicro[,as.matrix(p3sMicro@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(p3sMicro[,as.matrix(p3sMicro@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(p3sMicro@meta.data))]==pos_neg)
## [1] 102
p3sMicro=AddMetaData(
object = p3sMicro,
metadata = pos_neg,
col.name = 'Micro'
)
Idents(p3sMicro)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(p3sMicro)))]==pos_neg)
## [1] 102
p3_Micro_genes=FindMarkers(p3sMicro, ident.1=1,ident.2=-1,test.use = "wilcox")
p3_Micro_genes_a=rownames(p3_Micro_genes)[p3_Micro_genes[2]>1 & p3_Micro_genes[1]<0.05]
p3_Micro_genes_b=rownames(p3_Micro_genes)[p3_Micro_genes[2]<-1 & p3_Micro_genes[1]<0.05]
p3_Micro_genes_names=c(p3_Micro_genes_a,p3_Micro_genes_b)
p3sMicro_1 <- RunPCA(p3sMicro, npcs = 30, verbose = FALSE)
p3sMicro_1 = RunUMAP(p3sMicro_1, reduction = "pca", dims = 1:30)
## 01:18:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 01:18:20 Read 102 rows and found 30 numeric columns
## 01:18:20 Using Annoy for neighbor search, n_neighbors = 30
## 01:18:20 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 01:18:20 Writing NN index file to temp file /tmp/RtmpUuGHad/file1cfd2102b448
## 01:18:20 Searching Annoy index using 1 thread, search_k = 3000
## 01:18:20 Annoy recall = 100%
## 01:18:21 Commencing smooth kNN distance calibration using 1 thread
## 01:18:22 Initializing from normalized Laplacian + noise
## 01:18:22 Commencing optimization for 500 epochs, with 4264 positive edges
## 01:18:22 Optimization finished
p3sMicro_1=FindNeighbors(p3sMicro, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
p3sMicro_1=FindClusters(p3sMicro_1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 102
## Number of edges: 3360
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5284
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(p3sMicro_1,pt.size = 1)
